﻿using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using org.apache.pdfbox.pdmodel;
using org.apache.pdfbox.util;

namespace Jb.Search.TextExtractor
{
    public class PdfDocument : ITextExtractor
    {
        public PdfDocument(String filePath)
        {
            this.FilePath = filePath;
        }

        /// <summary>
        /// FilePath of the corresponding file to extract text from
        /// </summary>
        public String FilePath { get; set; }

        /// <summary>
        /// Extracts text from a PDF document.
        /// </summary>
        /// <param name="filePath"></param>
        /// <returns></returns>
        public String ExtractText()
        {
            PDDocument doc = PDDocument.load(FilePath);
            PDFTextStripper stripper = new PDFTextStripper();
            return stripper.getText(doc);
        }
    }
}
